***********************
***********************

*	Rents, Resource Diversification, and the Collapse of Autocratic Regimes
*	Matthew Fails and Marc DuBuis
*	Forthcoming at Political Research Quartlery
*	This code: July 6, 2015

***********************
***********************

clear 
use "C:\Users\fails\Documents\My Dropbox\DuBuis - leader tenure\publication materials\data and code\FailsDuBuisReplicationData.dta", replace
tsset ccode year

*generate a sample marker for all autocracies as defined by GWF
*This variable becomes the sample identifier for all analyses

gen sample = .
replace sample = 0 if gwf_party==0 & gwf_party!=. & gwf_military==0 & gwf_military!=. & gwf_monarchy==0 & gwf_monarchy!=. & gwf_personal==0 & gwf_personal!=.
replace sample = 1 if gwf_party==1 | gwf_military==1  | gwf_monarchy==1  | gwf_personal==1 
label var sample "GWF Autocracy Sample"

sum gwf_fail if sample==1
tab gwf_fail if sample==1

*Some general data maintenance
*log values of income and population
gen lgdppc = log(rgdpch)
label var lgdppc "Log income per capita, PWT"

gen logpop = log(pop)
label var logpop "Log of population"

*creation of time polynomials and description of DV
generate tim = gwf_duration
generate tim2 = gwf_duration^2
generate tim3 = gwf_duration^3

sum tim

*gen laggrowth var
gen laggr = l.gr
label var laggr "One year lag in growth"

*generate Ross oil variable
gen ross_oilstate = 0
replace ross_oilstate =1 if oil_gas_valuePOP_2000>100
label var ross_oilstate "Oil producer, $100 per capita"

sort ccode year
gen coupatt = 0
replace coupatt = 1 if coup1==1 
label var coupatt "attempted coup during year"

gen lagcoupatt = l.coupatt

********************
********************

** Calculate key IV of interest as HH Index of Concentration

*STEP 1 -  calculate an version here such that I replace . with 0 if other sources of natural resources are not missing
**when natresrents is a non-zero value, I can treat the missing data as a zero; See Section Two of Web Appendix for Details

replace coalrents = 0 if coalrents==. & natresrents!=.
replace forestrents = 0 if forestrents==. & natresrents!=.
replace mineralrents = 0 if mineralrents==. & natresrents!=.
replace natgasrents = 0 if natgasrents==. & natresrents!=.
replace oilrents = 0 if oilrents==. & natresrents!=.

**measure of ODA uses AidData, which does not need missing data adjustment as above

generate totalrent2 = coalrents+forestrents+mineralrents+natgasrents+oilrents+odarents_AD
label var totalrent2 "total rents as percentage of GDP, using AidData"

*STEP 2 -- We want to calculate the measure of diversity, excluding non-rents. To do this
*	we would take first calculate the measure of totalrents, and then divide each component
*	by total rent to weight each component by the overall volume of rent.
*	In other words, rescale so that each component is not as a percent of GDP, but as a percent 
*	of all unearned income combined. Code for this is below

gen coal_a = coalrents/totalrent2 if totalrent2>=0 & totalrent2<=100 & coalrents!=.
gen forest_a = forestrents/totalrent2 if totalrent2>=0 & totalrent2<=100 & forestrents!=.
gen mineral_a = mineralrents/totalrent2 if totalrent2>=0 & totalrent2<=100 & mineralrents!=.
gen natgas_a = natgasrents/totalrent2 if totalrent2>=0 & totalrent2<=100 & natgasrents!=.
gen oil_a = oilrents/totalrent2 if totalrent2>=0 & totalrent2<=100 & oilrents!=.
gen oda_a = odarents_AD/totalrent2 if totalrent2>=0 & totalrent2<=100 & odarents_AD!=. 

*STEP 3 - then take the sum of the squared terms
gen coal_a2 = coal_a^2
gen forest_a2 = forest_a^2
gen mineral_a2 = mineral_a^2
gen natgas_a2 = natgas_a^2
gen oil_a2 = oil_a^2
gen oda_a2 = oda_a^2

gen hhi_a = coal_a2+forest_a2+mineral_a2+natgas_a2+oil_a2+oda_a2
label var hhi_a "HHI for rent sources, adjusted as share of all totalrent"

gen diverse_a = 1-hhi_a
label var diverse_a "Rent diversification"

gen divadiv = diverse_a*diverse_a
label var divadiv "quadratic term for diverse_a, excluding nonrents"	
	

*************TABLE 2 - PROBIT REGRESSION MODELS

*MODEL 1 - ESTIMATING NONLINEAR EFFECT OF RENT DIVERSIFICATION, SLIM SPECIFICATION
probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party  tim tim2 tim3 if sample==1,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 

*MODEL 2 - INCLUDING THREE ADDITIONAL CONTROL VARIABLES
probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 


*******SUBSTANTIVE EFFECTS*****************

* model 1 - substantive effects
probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party  tim tim2 tim3 if sample==1,  cluster("gwf_casename")
sum diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party  tim tim2 tim3 if e(sample)
centile diverse_a if e(sample), centile(25 33 45 50 66 75)

estsimp probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party  tim tim2 tim3 if sample==1,  cluster(gwf_casename)

setx mean
simqi, fd(prval(1)) changex(diverse_a 0 .22 divadiv 0 0.05) /*min to 33% of sample*/
simqi, fd(prval(1)) changex(diverse_a .22 .49 divadiv 0.05 0.24 ) /*33% to 66% of sample*/
simqi, fd(prval(1)) changex(diverse_a .49 .8 divadiv 0.24 0.64 ) /*66% to max of sample*/

drop b*

* model 2 - substantive effects - these are the effects reported in the manuscript
probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1 & diverse_a>=0,  cluster("gwf_casename")
sum diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if e(sample)
centile diverse_a if e(sample), centile(25 33 45 50 66 75)
centile totalrent2 if e(sample), centile(25 33 50 66)

estsimp probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1 & diverse_a>=0,  cluster(gwf_casename)

* key variable - simulations moving from min to 33%, set others at mean, gwf and coup as mode(0)

setx mean
setx lagcoupatt 0 gwf_party 0
setx diverse_a 0 divadiv 0
simqi, prval(1)
setx diverse_a 0.28 divadiv 0.08
simqi, prval(1)
simqi, fd(prval(1)) changex(diverse_a 0 .28 divadiv 0 0.08) /*min to 33% of sample*/

setx diverse_a .28 divadiv 0.08
simqi, prval(1)
setx diverse_a 0.50 divadiv 0.25
simqi, prval(1)
simqi, fd(prval(1)) changex(diverse_a .28 .50 divadiv 0.08 0.25 ) /*33% to 66% of sample*/


***Some comparative measures with coup and party
setx mean
setx divadiv 0.14 /*set this to squared value of mean of diverse_a*/
setx totalrent2 0
simqi, prval(1)
setx totalrent2 4.9
simqi, prval(1)
simqi, fd(prval(1)) changex(totalrent2 0 4.9)

setx mean 
setx gwf_party 0
setx divadiv 0.14 /*set this to squared value of mean of diverse_a*/
setx lagcoupatt 0
simqi, prval(1)
setx lagcoupatt 1
simqi, prval(1)
simqi, fd(prval(1)) changex(lagcoupatt 0 1)

setx mean 
setx lagcoupatt 0
setx divadiv 0.14 /*set this to squared value of mean of diverse_a*/
setx gwf_party 0
simqi, prval(1)
setx gwf_party 1
simqi, prval(1)
simqi, fd(prval(1)) changex(gwf_party 0 1)

drop b*
	
	
****ROBUSTNESS ANALYSIS, USING FULL MODEL AS STARTING POINT
*Check 1 - re-estimate model while exlcuding values where diverse_a = 0 (i.e. make sure effect is not driven entirely by moving from 1 source of rent to more than 1)
probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1 & diverse_a>0,  cluster("gwf_casename")		
outreg2 using "c:\data\DuBuis\m3.ascii", replace bdec(2) coefastr 

*Check 2 - Are the results robust to considering oil and gas as a general category of fuel rents? This require a new calculation of the key variables

gen fuel = oilrents+natgasrents
gen fuel_a = fuel/totalrent2 if totalrent2>=0 & totalrent2<=100 & fuel!=.
gen fuel_a2 = fuel_a^2
gen hhi_ac = coal_a2+forest_a2+mineral_a2+fuel_a2+oda_a2
label var hhi_ac "HHI for rent sources, adjusted as share of all totalrent, combined fuel rents"
gen diverse_ac = 1-hhi_ac
label var diverse_ac "Rent diversification, excluding nonrents"
sum diverse_ac
pwcorr diverse_a diverse_ac, sig obs
gen divACdiv = diverse_ac*diverse_ac
label var divACdiv "quadratic term for diverse_ac, excluding nonrents, combined fuel rents"	
	
*now run the regressions again, full specification 
probit gwf_fail diverse_ac divACdiv totalrent2 lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 
	
	
*Check 3 - are the results (from the model in Table 2 model 3 robust to including a variable for oil dummies?
* this model uses "ross oil state" variable, where 1 = oil income per capita > $100
probit gwf_fail diverse_a divadiv totalrent2 ross_oilstate lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1 & diverse_a>=0,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 

*Check 4 - includes dummy for oil state but excludes measure of total rent dependence
probit gwf_fail diverse_a divadiv ross_oilstate lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1 & diverse_a>=0,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 
	

*Check 5 and 6 - are the results robust to excluding aid as a component of diversity index and totalrents as control variable?
*this requires re-codeing the main variables to exclude aid

gen totalnoaid = coalrents+forestrents+mineralrents+natgasrents+oilrents
label var totalnoaid "total rents as percentage of GDP, no ODA"

gen coal_na = coalrents/totalnoaid if totalnoaid>=0 & totalnoaid<=100 & coalrents!=.
gen forest_na = forestrents/totalnoaid if totalnoaid>=0 & totalnoaid<=100 & forestrents!=.
gen mineral_na = mineralrents/totalnoaid if totalnoaid>=0 & totalnoaid<=100 & mineralrents!=.
gen natgas_na = natgasrents/totalnoaid if totalnoaid>=0 & totalnoaid<=100 & natgasrents!=.
gen oil_na = oilrents/totalnoaid if totalnoaid>=0 & totalnoaid<=100 & oilrents!=.

*if there is only one source of rents, then dividing by totalrents will lead to an adjusted score of 1
*then, when you eventually take 1-hhi, since hhi will be 1, you get a diversity score of 1

*then take the sum of the squared terms
gen coal_na2 = coal_na^2
gen forest_na2 = forest_na^2
gen mineral_na2 = mineral_na^2
gen natgas_na2 = natgas_na^2
gen oil_na2 = oil_na^2

gen hhi_na = coal_na2+forest_na2+mineral_na2+natgas_na2+oil_na2
label var hhi_na "HHI for rent sources, adjusted as share of all totalrent, exlcuding ODA"

gen diverse_na = 1-hhi_na
label var diverse_na "Rent diversification, excluding nonrents, exlcuding ODA"

gen div_na_div = diverse_na*diverse_na
label var div_na_div "quadratic term for diverse_na, excluding nonrents, excluding ODA"	

*column 5 regression - excludes aid from calculations but includes oil state dummy
probit gwf_fail diverse_na div_na_div totalnoaid ross_oilstate lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1 & diverse_na>=0,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 
	
*colum 6 regression - excludes aid from calculations but includes oil state dummy; excludes measure of total rents (measured to exclude aid)
probit gwf_fail diverse_na div_na_div  ross_oilstate lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1 & diverse_na>=0,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 

	
************************Other empirical elements in the paper to report re: substitutibility****************************

** Do countries with higher levels of rent diversification have smaller standard deviations in terms of government spending as % GDP?
* see pages ___ in article

*first, generate an identifier for the estimation sample
probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party tim tim2 tim3 if sample==1,  cluster("gwf_casename")
generate estsamp = 1 if e(sample)

sort gwf_casename
by gwf_casename: egen govdev = sd(govspend)	
label var govdev "Standard deviation of government spending as % GDP"
sort ccode year

*From a regression standpoint
reg govdev diverse_a if estsamp==1, robust
reg govdev diverse_a lgdppc  if estsamp==1, robust

*Comparing means for "high" and "low" rent diversification observations
sum diverse_a if estsamp==1
*	use .35 as cut point for sample mean
sum govdev if diverse_a<.35 & estsamp==1
sum govdev if diverse_a>=.35 & estsamp==1


*what is the intra-panel correlation between the major components of the resource diversification index?
**The components of the measure are: coal_a2+forest_a2+mineral_a2+natgas_a2+oil_a2+oda_a2

by ccode: gen allnat = (coal_a2+forest_a2+mineral_a2+natgas_a2+oil_a2)

by ccode: egen corr_all=corr(allnat oda_a2)
sum corr_all if estsamp==1
*this shows that the average correlation within panels in the estimation sample between the sum of natural resource rents and ODA is -.68 
*in other words, as natural resource rents go down, aid also goes down

*how about the average correlation between nat gas and oil?
by ccode: egen corrfuel=corr(natgas_a2 oil_a2)
sum corrfuel if estsamp==1

*how about the average correlation between oil and aid?
by ccode: egen corr_oa = corr(oil_a2 oda_a2)
sum corr_oa if estsamp==1



** PIE CHART CODE
*start with the main model, full specification
probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party physint logpop frac_rel tim tim2 tim3 if sample==1 & diverse_a>=0,  cluster("gwf_casename")		
hilo diverse_a country year if e(sample) & year==2005

*These resource_a variables are already adjusted to reflect that resource as a share of all rent sources
*to put them into a table form, I should multiply the raw value by 100, so for instance, a score of 1 beceomes 100% of total rents
*a score of .5  becomes 50% of all rent sources

list totalrent2 coal_a forest_a mineral_a natgas_a oil_a oda_a hhi_a diverse_a if country=="Angola" & year==2005
list totalrent2 coal_a forest_a mineral_a natgas_a oil_a oda_a hhi_a diverse_a if country=="Sudan" & year==2005
list totalrent2 coal_a forest_a mineral_a natgas_a oil_a oda_a  hhi_a diverse_a if country=="Kazakhstan" & year==2005



**************************
**************************
*	APPENDIX MATERIALS
**************************
**************************



*** Section 2 - different decision on how to treat missing data
**This is contained in a separate do file - see additional files


*** Section 3 - additional robustness tests concerning the issue of government capture of available rents

**Start my creating a dummy variable for NOC presence, based on appendix in Cheon et al (CPS 2015)
gen noc = 0
replace noc = 1 if country== "Algeria" & year>=  1963 
replace noc = 1 if country== "Angola" & year>=  1976 
replace noc = 1 if country== "Argentina" & year>=  2004 
replace noc = 1 if country== "Azerbaijan" & year>=  1992 
replace noc = 1 if country== "Bahrain" & year>= 1976 
replace noc = 1 if country== "Bangladesh" & year>=  1972 
replace noc = 1 if country== "Belarus" & year>= 1966 
replace noc = 1 if country== "Bolivia" & year>= 1937 
replace noc = 1 if country== "Brazil" & year>=1953 
replace noc = 1 if countrycode== "BRN" & year>=2002 
replace noc = 1 if country== "Cameroon" & year>=1980 
replace noc = 1 if country== "Chad" & year>=2006 
replace noc = 1 if country== "Chile" & year>= 1950 
replace noc = 1 if country== "China" & year>=1982 
replace noc = 1 if country== "Colombia" & year>=1951 
replace noc = 1 if countrycode== "COG" & year>= 1998 
replace noc = 1 if countrycode== "CIV" & year>= 1975
replace noc = 1 if country== "Ecuador" & year>=1972 
replace noc = 1 if country== "Egypt" & year>= 1956 
replace noc = 1 if country== "Equatorial Guinea" & year>= 2002
replace noc = 1 if country== "France" & year>= 1946
replace noc = 1 if country== "Ghana" & year>=1983
replace noc = 1 if country== "India" & year>=1956 
replace noc = 1 if country== "Indonesia" & year>=1957 
replace noc = 1 if countrycode== "IRN" & year>=1948 
replace noc = 1 if country== "Iraq" & year>=1966 
replace noc = 1 if country== "Italy" & year>=1953 
replace noc = 1 if country== "Japan" & year>= 1967
replace noc = 1 if country== "Kazakhstan" & year>=2002 
replace noc = 1 if country== "Kenya" & year>=1981
replace noc = 1 if countrycode== "KOR" & year>=1979
replace noc = 1 if country== "Kuwait" & year>=1980 
replace noc = 1 if country== "Libya" & year>=1970
replace noc = 1 if country== "Malaysia" & year>=1974 
replace noc = 1 if country== "Mauritania" & year>= 2004
replace noc = 1 if country== "Mexico" & year>= 1938
replace noc = 1 if country== "Morocco" & year>= 1981 
replace noc = 1 if country== "Mozambique" & year>=1981 
replace noc = 1 if country== "Nigeria" & year>=1971 
replace noc = 1 if country== "Norway" & year>=1972
replace noc = 1 if country== "Oman" & year>= 1925 
replace noc = 1 if country== "Pakistan" & year>= 1961 
replace noc = 1 if country== "Peru" & year>=1969 
replace noc = 1 if country== "Philippines" & year>= 1973 
replace noc = 1 if country== "Qatar" & year>=1958
replace noc = 1 if countrycode== "RUS" & year>=1989 
replace noc = 1 if countrycode== "STP" & year>= 1998
replace noc = 1 if country== "Saudi Arabia" & year>=1973 
replace noc = 1 if country== "South Africa" & year>= 1965 
replace noc = 1 if country== "Sudan" & year>=1997 
replace noc = 1 if countrycode== "SYR" & year>=1974 
replace noc = 1 if country== "Taiwan" & year>=1946
replace noc = 1 if country== "Tanzania" & year>= 1969 
replace noc = 1 if country== "Thailand" & year>=1978 
replace noc = 1 if countrycode== "TTO" & year>= 1974 
replace noc = 1 if country== "Tunisia" & year>=1972 
replace noc = 1 if country== "Turkey" & year>=1954
replace noc = 1 if country== "Turkmenistan" & year>=1996 
replace noc = 1 if countrycode== "UGA" & year>= 2008 
replace noc = 1 if country== "Ukraine" & year>= 1998 
replace noc = 1 if countrycode== "ARE" & year>=1971 
replace noc = 1 if country== "Uzbekistan" & year>= 1992 
replace noc = 1 if country== "Venezuela" & year>=1975 
replace noc = 1 if country== "Vietnam" & year>=1977 
replace noc = 1 if countrycode== "YEM" & year>=1996
label var noc "National Oil Company"


**********These are the reported models in Appendix C
*The idea is that each of these is consistent behavior with the assumption that gov'ts appropriate rents when they are there
reg nontaxtotalgdp totalrent2 lgdppc if estsamp==1, cluster(gwf_casename)
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 

reg soegdp totalrent2 lgdppc if estsamp==1, cluster(gwf_casename)
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 

logit noc totalrent2 lgdppc if estsamp==1, cluster(gwf_casename)
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 

probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party  taxrevgdp tim tim2 tim3 if sample==1 & diverse_a>=0,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 

probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party  taxrevpc tim tim2 tim3 if sample==1 & diverse_a>=0,  cluster("gwf_casename")
outreg2 using "c:\data\DuBuis\m1.ascii", replace bdec(2) coefastr 


***Section 1 - Descriptive statistics - run this regression first, to get the sample identifier
probit gwf_fail diverse_a divadiv totalrent2 lgdppc laggr lagcoupatt gwf_party  tim tim2 tim3 if sample==1 & diverse_a>=0,  cluster("gwf_casename")
sum gwf_fail diverse_a divadiv diverse_ac divACdiv diverse_na div_na_div totalrent2 lgdppc laggr lagcoupatt physint logpop frac_rel gwf_party ross_oilstate  nontaxtotalgdp  soegdp noc taxrevgdp taxrevpc tim tim2 tim3 if e(sample)


